# Install and load packages
if(!require(pacman))
install.packages("pacman")
pacman::p_load(tidyverse,
glue,
scales,
countdown,
ggthemes,
gt,
palmerpenguins,
openintro,
ggrepel,
patchwork,
quantreg,
janitor,
colorspace,
broom,
fs,
here,
openintro,
gghighlight,
lubridate,
dsbox,
ggridges,
gtable,
ggimage,
png,
ggpubr
)
devtools::install_github("tidyverse/dsbox")
# Set theme for ggplot2
ggplot2::theme_set(ggplot2::theme_minimal(base_size = 14))
# Set width of code output
options(width = 65)
# Set figure parameters for knitr
knitr::opts_chunk$set(
fig.width = 7, # 7" width
fig.asp = 0.618, # the golden ratio
fig.retina = 3, # dpi multiplier for displaying HTML output on retina
fig.align = "center", # center align figures
dpi = 300 # higher dpi, sharper image
)HW 03
INFO 526 Summer 2025
0 - Setup
1 - Du Bois challenge.
# Loading in the data
income <- read_csv("data/income.csv")
income |>
glimpse()Rows: 7
Columns: 7
$ Class <chr> "$100-200", "$200-300", "$300-400", "$40…
$ Average_Income <dbl> 139.10, 249.45, 335.66, 433.82, 547.00, …
$ Rent <dbl> 19, 22, 23, 18, 13, 0, 0
$ Food <dbl> 43, 47, 43, 37, 31, 37, 29
$ Clothes <dbl> 28, 23, 18, 15, 17, 19, 16
$ Tax <dbl> 9.9, 4.0, 4.5, 5.5, 5.0, 8.0, 4.5
$ Other <dbl> 0.1, 4.0, 11.5, 24.5, 34.0, 36.0, 50.5
# Correcting the data
income[1, 6] = 0.1
income[1, 7] = 9.9 # Source: Marcel Hebing, StackOverflow
# Pivoting the data long
income_clean <- income |>
pivot_longer(
cols = c("Rent", "Food", "Clothes", "Tax", "Other"),
names_to = "category",
values_to = "expenditure"
) |>
glimpse()Rows: 35
Columns: 4
$ Class <chr> "$100-200", "$100-200", "$100-200", "$10…
$ Average_Income <dbl> 139.10, 139.10, 139.10, 139.10, 139.10, …
$ category <chr> "Rent", "Food", "Clothes", "Tax", "Other…
$ expenditure <dbl> 19.0, 43.0, 28.0, 0.1, 9.9, 22.0, 47.0, …
# Set image
image <- "https://cdn.pixabay.com/photo/2012/12/06/06/27/paper-68829_1280.jpg"
# Releveling factor variables
income_clean <- income_clean |>
mutate(
Class = fct_relevel(Class, "$100-200", "$200-300", "$300-400", "$400-500", "$500-750", "$750-1000", "$1000 AND OVER"),
category = fct_relevel(category, "Rent", "Food", "Clothes", "Tax", "Other"),
Class = fct_rev(Class),
category = fct_rev(category)
)
# Creating cumulative summations
income_clean <- income_clean |>
group_by(Class) |>
mutate(label_y = cumsum(expenditure) - 0.5 * expenditure) |>
ungroup() # Source: R Graphics Codebook
# Prepping labels
income_clean2 <- income_clean|>
filter(category != "Rent" & category != "Tax")
income_clean2 <- income_clean2 |>
mutate(
perc = "%",
expend2 = glue("{expenditure}{perc}")
)
# Plotting
plot <- income_clean |>
ggplot(aes(x = Class, y = expenditure, fill = category)) +
geom_col(width = 0.5) +# Source: Geeks for Geeks (https://www.geeksforgeeks.org/r-language/grouped-stacked-and-percent-stacked-barplot-in-ggplot2/) +
geom_text(data = income_clean2, aes(y = label_y, label = expend2), color = "black", size = 3, family = "mono") +
labs(
y = NULL,
x = NULL,
title = "INCOME AND EXPENDITURE OF 150 NEGRO FAMILIES IN ATLANTA, GA., U.S.A."
) +
annotate(
geom = "text",
x = c("$100-200", "$200-300", "$300-400", "$400-500", "$500-750"),
y = c(9.5, 11, 11.5, 9, 6.5),
label = c("19%", "22%", "23%", "18%", "13%"),
size = 3,
color = "white",
family = "mono"
) +
annotate(
geom = "text",
x = c("$200-300", "$300-400", "$400-500", "$500-750", "$750-1000", "$1000 AND OVER"),
y = c(94, 86.25, 72.75, 63.5, 60, 47.25),
label = c("4%", "4.5%", "5.5%", "5%", "8%", "4.5%"),
size = 2,
color = "black",
family = "mono"
) +
annotate(
geom = "text",
x = "$100-200",
y = 0,
label = "CLASS ACTUAL AVERAGE \n\n\n",
size = 2,
color = "black",
family = "mono"
) +
scale_x_discrete(labels = c("1,000 $1,125 \nAND OVER ", "$750-1000 $880 ", "$500-750 $547 ", "$400-500 $433.82", "$300-400 $335.66", "$200-300 $249.45", "$100-200 $139.10")) +
coord_flip(clip = "off") +
scale_fill_manual(breaks = c('Rent', 'Food', 'Clothes', 'Tax', 'Other'), values = c("black", "purple", "sienna1", "slategray1", "snow2")) + # Source: https://www.statology.org/ggplot-legend-order/
theme(
legend.position = "top",
plot.title.position = "plot",
plot.title = element_text(size = 10, hjust = 0.5, face = "bold"),
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_text(size =7, color = "black"),
text = element_text(family = "mono"),
legend.title = element_blank(),
legend.text = element_text(size = 9),
legend.key.size = unit(0.3, "cm") # Source: https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
)
ggbackground(plot, image) # Source: Guangchuang Yu (https://guangchuangyu.github.io/2018/04/setting-ggplot2-background-with-ggbackground/)